"""
李运辰

公众号：python研究者
"""


import requests
from lxml import etree
import json
import time
import openpyxl

### 姓名
name = []
### 明星类型
types = []
url_list=[
    'https://baike.baidu.com/item/奔跑吧兄弟第一季#4_2',
    'https://baike.baidu.com/item/奔跑吧兄弟第二季/16414779',
    'https://baike.baidu.com/item/奔跑吧兄弟第三季#4_2',
    'https://baike.baidu.com/item/奔跑吧兄弟第四季#4_2',
    'https://baike.baidu.com/item/奔跑吧第一季/20433390?fromtitle=奔跑吧第1季&fromid=22645259&fr=aladdin#4_2',
    'https://baike.baidu.com/item/奔跑吧第二季/22421345?fromtitle=奔跑吧第2季&fromid=22645247&fr=aladdin#4_2',
    'https://baike.baidu.com/item/奔跑吧第三季/23284990?fromtitle=奔跑吧第3季&fromid=23285732&fr=aladdin',
    'https://baike.baidu.com/item/奔跑吧第四季/24701671?fromtitle=奔跑吧第4季&fromid=50003758&fr=aladdin',
    'https://baike.baidu.com/item/奔跑吧·黄河篇/53052048#4'
]
outwb = openpyxl.Workbook()
outws = outwb.create_sheet(index=0)
outws.cell(row=1, column=1, value="名字")
outws.cell(row=1, column=2, value="明星类型")



headers = {
            'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
}
### 循环遍历
for u in url_list:
    url = u
    print(url)
    res = requests.get(url,headers=headers)
    res.encoding = 'utf-8'
    text = res.text

    if "分期嘉宾" in text:
        if "表演嘉宾" in text:
            sp1 = text.split("分期嘉宾")[1].split("表演嘉宾")[0]

            selector = etree.HTML(sp1)
            tr_list = selector.xpath('.//tr')
            tr_list = tr_list[1:]

            for i in tr_list:
                #print(i.xpath(".//div[@class='para']/b/a/text()")[0])
                name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
            for i in tr_list:
                te = i.xpath(".//div[@class='para']/text()")[0]
                te = te.split("，")[0].replace("（","")
                #print(te)
                types.append(te)
        else:
            sp1 = text.split("分期嘉宾")[1].split("人物关系")[0]

            selector = etree.HTML(sp1)
            tr_list = selector.xpath('.//tr')
            tr_list = tr_list[1:]

            for i in tr_list:
                # print(i.xpath(".//div[@class='para']/b/a/text()")[0])
                name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
            for i in tr_list:
                te = i.xpath(".//div[@class='para']/text()")[0]
                te = te.split("，")[0].replace("（", "")
                # print(te)
                types.append(te)
    if "嘉宾介绍" in text :
        if "奔跑吧第4季" in  url:
            sp1 = text.split("飞行嘉宾")[1].split("节目原声")[0]

            selector = etree.HTML(sp1)
            tr_list = selector.xpath('.//tr')
            tr_list = tr_list[1:]

            for i in tr_list:
                # print(i.xpath(".//div[@class='para']/b/a/text()")[0])
                name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
            for i in tr_list:
                te = i.xpath(".//div[@class='para']/text()")[0]
                te = te.split("，")[0].replace("（", "")
                # print(te)
                types.append(te)
        elif "奔跑吧·黄河篇" in url:
            sp1 = text.split("飞行嘉宾")[1].split("获奖记录")[0]

            selector = etree.HTML(sp1)
            tr_list = selector.xpath('.//tr')
            tr_list = tr_list[1:]

            for i in tr_list:
                # print(i.xpath(".//div[@class='para']/b/a/text()")[0])
                name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
            for i in tr_list:
                te = i.xpath(".//div[@class='para']/text()")[0]
                te = te.split("，")[0].replace("（", "")
                # print(te)
                types.append(te)
        else:
            sp1 = text.split("嘉宾介绍")[1].split("人物关系")[0]

            selector = etree.HTML(sp1)
            tr_list = selector.xpath('.//tr')
            tr_list = tr_list[1:]

            for i in tr_list:
                # print(i.xpath(".//div[@class='para']/b/a/text()")[0])
                name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
            for i in tr_list:
                te = i.xpath(".//div[@class='para']/text()")[0]
                te = te.split("，")[0].replace("（", "")
                # print(te)
                types.append(te)

for i in range(0,len(name)):
    outws.cell(row=i+2, column=1, value=str(name[i]))
    outws.cell(row=i+2, column=2, value=str(types[i]))
outwb.save("奔跑吧嘉宾名单-李运辰.xls")  # 保存


print(name)
print(types)
print(len(name))
print(len(types))